{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import datetime as dt\n",
    "import numpy as np\n",
    "import os\n",
    "import gc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#显示所有列\n",
    "pd.set_option('display.max_columns', None)\n",
    "#显示所有行\n",
    "pd.set_option('display.max_rows', None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 转换类型\n",
    "# def trans_dtype(data):\n",
    "#     '''\n",
    "#     input:\n",
    "#     data(DataFrame)\n",
    "#     retrun;\n",
    "#     new_data(DataFrame)\n",
    "#     '''\n",
    "#     type_dict = data.dtypes.to_dict()\n",
    "#     for key,value in type_dict.items():\n",
    "#         if value == np.int64:\n",
    "#             type_dict[key] = np.int32\n",
    "#         if value == np.float64:\n",
    "#             type_dict[key] = np.float32\n",
    "#     new_data = data.astype(type_dict)\n",
    "#     return new_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if not os.path.exists('../preprocess_data'):\n",
    "    os.mkdir('../preprocess_data')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    '''\n",
    "    描述：\n",
    "        1.读入5个训练数据片段并完成拼接，得到共计10万条样本。\n",
    "        根据label的有无，划分数据，并输出train_y_33465.csv、train_x_33465.csv、unlabel_y_66535.csv、unlabel_x_66535.csv\n",
    "        2.读入valid（分隔符为'\\t'），输出valid.csv（分隔符为','）\n",
    "    '''\n",
    "    input_path = '../data/open_data_train_valid/'\n",
    "    output_path = '../preprocess_data/'\n",
    "    \n",
    "    columns = ['id','loan_dt','label','tag']+['f%d'%(i+1)for i in range(6745)]\n",
    "    train_path_1 = input_path + 'train/train_1.txt'\n",
    "    train_path_2 = input_path + 'train/train_2.txt'\n",
    "    train_path_3 = input_path + 'train/train_3.txt'\n",
    "    train_path_4 = input_path + 'train/train_4.txt'\n",
    "    train_path_5 = input_path + 'train/train_5.txt'\n",
    "    \n",
    "    valid_path = input_path + 'valid.txt'\n",
    "    test_path = '../data/open_data_test/test_id.txt'\n",
    "    \n",
    "    train_1 = pd.read_csv(train_path_1, delimiter='\\t')\n",
    "    train_2 = pd.read_csv(train_path_2, delimiter='\\t' , header = None)\n",
    "    train_3 = pd.read_csv(train_path_3, delimiter='\\t' , header = None)\n",
    "    train_4 = pd.read_csv(train_path_4, delimiter='\\t' , header = None)\n",
    "    train_5 = pd.read_csv(train_path_5, delimiter='\\t' , header = None)\n",
    "    train_2.columns = columns\n",
    "    train_3.columns = columns\n",
    "    train_4.columns = columns\n",
    "    train_5.columns = columns\n",
    "    print('读取完train data！')\n",
    "    \n",
    "    train = pd.concat([train_1,train_2,train_3,train_4,train_5],axis = 0,ignore_index=True)\n",
    "    print('train data拼接完成！')\n",
    "    \n",
    "    valid = pd.read_csv(valid_path, delimiter='\\t')\n",
    "    print('读取完 valid data!')\n",
    "    \n",
    "    test = pd.read_csv(test_path, delimiter='\\t')\n",
    "    print('读取完 test data!')\n",
    "    \n",
    "    unlabel = train.loc[train.label.isnull()]\n",
    "    train = train.loc[train.label.isnull()==False]\n",
    "    \n",
    "    train_y = train[['label']]\n",
    "    train_x = train.drop(columns=['label'])\n",
    "    unlabel_y = unlabel[['label']]\n",
    "    unlabel_x = unlabel.drop(columns=['label'])\n",
    "    \n",
    "    \n",
    "    train_y.to_csv(output_path + 'train_y_33465.csv', index = False)\n",
    "    train_x.to_csv(output_path + 'train_x_33465.csv', index = False)\n",
    "    print('输出train')\n",
    "\n",
    "    unlabel_y.to_csv(output_path + 'unlabel_y_66535.csv', index = False)\n",
    "    unlabel_x.to_csv(output_path + 'unlabel_x_66535.csv', index = False)\n",
    "    print('输出unlabel')\n",
    "    \n",
    "    valid.to_csv(output_path + 'valid.csv', index = False)\n",
    "    print('输出valid')\n",
    "    \n",
    "    test.to_csv(output_path + 'test.csv', index = False)\n",
    "    print('输出test')\n",
    "\n",
    "    return train_x, unlabel_x, valid, test\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 0 ns, sys: 0 ns, total: 0 ns\n",
      "Wall time: 5.01 µs\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "train_x, unlabel_x, valid, test = load_data()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.提取 month 和 day"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "def gen_mon_day_hour(data, file_name):\n",
    "    '''\n",
    "    描述：\n",
    "        把'loan_dt'转化为loan_month、loan_day\n",
    "    参数：\n",
    "        input:\n",
    "            data(DataFrame): 有列loan_dt\n",
    "            file_name(str)\n",
    "        output:\n",
    "            data(DataFrame)： 删除列loan_dt, 添加列loan_month和loan_day\n",
    "    '''\n",
    "    data['loan_dt'] = pd.to_datetime(data.loan_dt)\n",
    "    data_date = data[['id']].copy()\n",
    "    data_date['loan_month'] = data.loan_dt.apply(lambda x: x.month)\n",
    "    data_date['loan_day'] = data.loan_dt.apply(lambda x: x.day)\n",
    "    data_date.drop(columns=['loan_dt'], inplace =True)\n",
    "    \n",
    "\n",
    "    data_date.to_csv(os.path.join('../preprocess_data','%s_date.csv'%file_name), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.68 s, sys: 16 ms, total: 1.7 s\n",
      "Wall time: 1.83 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "gen_mon_day_hour(train_x, 'train_x')\n",
    "gen_mon_day_hour(unlabel_x, 'unlabel_x')\n",
    "gen_mon_day_hour(valid, 'valid')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.(abandoned)统计n_null和discret_null"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gen_null(train_x, unlabel_x, valid):\n",
    "    '''\n",
    "    描述：\n",
    "        1.统计每一样本的缺失值数量n_null\n",
    "        2.将n_null等量划分为10段，得到特征discret_null\n",
    "    参数:\n",
    "        input:\n",
    "            train_x(DataFrame),unlabel_x(DataFrame),valid(DataFrame)\n",
    "        output:\n",
    "            对应的csv\n",
    "    \n",
    "    '''\n",
    "    # n_null\n",
    "    train_x_null = train_x[['id']].copy() \n",
    "    train_x = train_x.drop(columns =['id', 'tag']) \n",
    "    train_x_null['n_null'] = (train_x.isnull()).sum(axis=1)\n",
    "    \n",
    "    unlabel_x_null = unlabel_x[['id']].copy() \n",
    "    unlabel_x = unlabel_x.drop(columns =['id','tag']) \n",
    "    unlabel_x_null['n_null'] = (unlabel_x.isnull()).sum(axis=1)\n",
    "    \n",
    "    valid_null = valid[['id']].copy()\n",
    "    valid = valid.drop(columns =['id'])\n",
    "    valid_null['n_null'] = (valid.isnull()).sum(axis=1)\n",
    "    \n",
    "    # 展示n_null的分布\n",
    "    import matplotlib.pyplot as plt\n",
    "    %matplotlib inline\n",
    "    plt.plot(train_x_null['n_null'].sort_values(ascending=True).values)\n",
    "    plt.plot(unlabel_x_null['n_null'].sort_values(ascending=True).values)\n",
    "    plt.plot(valid_null['n_null'].sort_values(ascending=True).values)\n",
    "    \n",
    "    plt.legend(['train_x','unlabel_x','valid'])\n",
    "\n",
    "    # 警示：三个数据的切分标准没有统一，这里是有问题的！'discret_null'不能被使用！\n",
    "    # 通过n_null的分布我们可以看到，三条曲线拐点是不一致的。\n",
    "    # 关于更加详细的n_null探索,见 2.explore_data板块\n",
    "\n",
    "    # pandas.qcut按分位数进行切分，所以是等量的。\n",
    "#     train_x['discret_null'] = pd.qcut(train_x_null['n_null'],10,labels=False,duplicates='drop')\n",
    "#     unlabel_x['discret_null'] = pd.qcut(unlabel_x_null['n_null'],10,labels=False,duplicates='drop')\n",
    "#     valid['discret_null'] = pd.qcut(valid_null['n_null'], 10,labels=False,duplicates='drop')\n",
    "    output_path = '../preprocess_data/'\n",
    "    train_x_null.to_csv(output_path+'train_x_null.csv', index = False)\n",
    "    unlabel_x_null.to_csv(output_path+'unlabel_x_null.csv', index = False)\n",
    "    valid_null.to_csv(output_path+'valid_null.csv', index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 57.1 s, sys: 1.58 s, total: 58.6 s\n",
      "Wall time: 58.7 s\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl4VNXdwPHvyZ6QkD0hZCFhJ+wQFkVpkFcUxKVu4CtKrW+pipVWrWJbt6p9tfWRFl+19RUVKoqIIuiLWkCoyk6QHSQBQhYCSSY72TPn/ePeQEAgIZk18/s8zzxz59w7c38Dk/O795xzz1Vaa4QQQngeL2cHIIQQwjkkAQghhIeSBCCEEB5KEoAQQngoSQBCCOGhJAEIIYSHkgQghBAeShKAEEJ4KEkAQgjhoXycHcDFREVF6eTkZGeHIYQQbiUjI6NYax3d2nYunQCSk5PZvn27s8MQQgi3opQ61pbtpAlICCE8lCQAIYTwUJIAhBDCQ7l0H8D5NDQ0kJeXR21trbNDcXsBAQEkJCTg6+vr7FCEEE7QpgSglMoGKoEmoFFrnaaUigA+BJKBbOB2rXWpUkoBfwOmANXAz7TWO8zPmQn8wfzY57XWCy814Ly8PEJCQkhOTsbYlWgPrTUWi4W8vDxSUlKcHY4QwgkupQlogtZ6mNY6zXw9F1irte4DrDVfA0wG+piPWcAbAGbCeBoYA4wGnlZKhV9qwLW1tURGRkrl30FKKSIjI+VMSggP1pE+gBuB5iP4hcBNLcoXacNmIEwpFQdcA6zWWpdorUuB1cC17dmxVP62If+OQni2tiYADfxLKZWhlJpllsVqrQvM5RNArLkcD+S2eG+eWXahcuHmVh5eybJDy5wdhhDiErW1E/gKrXW+UioGWK2UOthypdZaK6VscnNhM8HMAkhKSrLFRwo7+78j/0dVQxW39r3V2aEIIS5Bm84AtNb55nMhsByjDf+k2bSD+Vxobp4PJLZ4e4JZdqHyc/f1ptY6TWudFh3d6pXMTlFWVsbrr79+ye+bMmUKZWVldohICCEuXasJQCnVRSkV0rwMTAL2AiuBmeZmM4EV5vJK4G5lGAuUm01FXwGTlFLhZufvJLPM7VwoATQ2Nl70fatWrSIsLMxeYTmN1jY5+RNCOFhbmoBigeVmh6EP8L7W+kul1DZgqVLqXuAYcLu5/SqMIaBZGMNA7wHQWpcopZ4Dtpnb/VFrXdKR4J/9bB/7j1d05CN+JLV7V56+fuBFt5k7dy6HDx9m2LBh+Pr6EhAQQHh4OAcPHuTQoUPcdNNN5ObmUltby5w5c5g1y+g2aZ7bqKqqismTJ3PFFVewceNG4uPjWbFiBYGBgT/aV2NjI5dddhl/+ctfSE9P54knnsDLy4sXXnjBpt+7oxTSoSyEzSy6EaL7w+SX7LqbVhOA1voIMPQ85RZg4nnKNTD7Ap/1NvD2pYfpWl588UX27t3Lzp07Wb9+Pddddx179+49PZ7+7bffJiIigpqaGkaNGsUtt9xCZGTkWZ+RmZnJBx98wP/+7/9y++238/HHHzNjxowf7cvHx4d3332XW2+9lVdffZUvv/ySLVu2OOR7CiGcpCwXgiJb366D3O5K4JZaO1J3lNGjR591MdX8+fNZvnw5ALm5uWRmZv4oAaSkpDBs2DAARo4cSXZ29gU/f+DAgdx1111MnTqVTZs24efnZ/sv0QEaaQISwrY0OOCs2q0TgKvo0qXL6eX169ezZs0aNm3aRFBQEOnp6ee92Mrf3//0sre3NzU1NRfdx549ewgLC6OwsPCi2zmLNAEJYUNagwOu05HJ4NohJCSEysrK864rLy8nPDycoKAgDh48yObNmzu8v08++YSSkhK++eYbfvWrX8lIIiE6O20FZf/qWc4A2iEyMpJx48YxaNAgAgMDiY2NPb3u2muv5e9//zsDBgygX79+jB07tkP7Ki4uZu7cuaxdu5bExEQefPBB5syZw8KFlzyNkt1IE5AQtiZNQC7t/fffP2+5v78/X3zxxXnXNbfzR0VFsXfv3tPljz766AX3ExUVxaFDh06/fuihh9oRrf3JtBJC2JBGmoCEEMIzaWkC8jSzZ89mw4YNZ5XNmTOHe+65x0kRCSGcQluRJiAP89prrzk7hPaRLgAhbEtrR9T/0gQkbEOGgQphS45pApIEIDpMRgEJYWMOagKSBCCEEK5GLgQTQghP5ZjrACQBOMD69euZOnXqRbd59913efDBBy/pc5OTkykuLu5IaDYhTUBC2JiWPgDhRuRCMCFsSFsd0gTk3sNAv5gLJ/bY9jO7DYbJL150k+zsbKZOnXr6at6XX36Zqqoq1q9fz5gxY1i3bh1lZWUsWLCAK6+88qz3bt26lTlz5lBbW0tgYCDvvPMO/fr1A4yZQ9PT08nPz2fGjBk8/fTTALz33nvMnz+f+vp6xowZw+uvv463t/dFY9y2bRv33nsvW7dupampidGjR/Phhx8yaNCg9v7LXJDcEEYIW5OpINxSY2MjW7duZdWqVTz77LOsWbPmrPX9+/fn22+/xcfHhzVr1vC73/2Ojz/+GDCSw969ewkKCmLUqFFcd911dOnShQ8//JANGzbg6+vLAw88wOLFi7n77rsvGseoUaO44YYb+MMf/kBNTQ0zZsywS+XfTIaBCmFDDmoCcu8E0MqRujPcfPPNwIXn+C8vL2fmzJlkZmailKKhoeH0uquvvvr0fQNuvvlmvvvuO3x8fMjIyGDUqFEA1NTUEBMT06ZYnnrqKUaNGkVAQADz58/v4DcTQjiMg0YBuXcCcBIfHx+sVuvp1y3n+2+e59/b2/u89wh+8sknmTBhAsuXLyc7O5v09PTT685tR1dKobVm5syZ/Pd///clx2mxWKiqqqKhoYHa2tqz7ltgSxotZwBC2JSMAnJZsbGxFBYWYrFYqKur4/PPP2/ze8vLy4mPjweMkT8trV69mpKSEmpqavj0008ZN24cEydOZNmyZadvBFNSUsKxY8fatK9f/vKXPPfcc9x55508/vjjbY6xPaQTWAgbkjMA1+Xr68tTTz3F6NGjiY+Pp3///m1+72OPPcbMmTN5/vnnue66685aN3r0aG655Rby8vKYMWMGaWlpADz//PNMmjQJq9WKr68vr732Gj169LjofhYtWoSvry//+Z//SVNTE5dffjlff/01V1111aV/4VZoB81b4koam6yUnKonJMCXQL+Ld8gLcekc0wegXHkER1pamt6+fftZZQcOHGDAgAFOiqjzscW/58wvZuLj5cOCaxbYKCrXl1tSzZV/XsfLtw3l1pEJzg5HdDYvxEHaz+GaF9r1dqVUhtY6rbXtpAlICCFcjTQBibawWCxMnDjxR+Vr1649PaJICOFuZBioaIPIyEh27tzp7DBkFJAQtiSzgQohhIeS2UCFu5DJ4ISwIa3B2ghevnbflSQAYRse1gLkwoPnhLuzNgEavCUBdArBwcEAHD9+nFtvvfW826Snp3PukFd34cpDie3Nw/KecASrOT2Ml/27aCUBOFD37t1ZtmyZs8OwC+kEFsJGmswEIGcArmnu3Lm89tprp18/88wzPP/880ycOJERI0YwePBgVqxY8aP3ZWdnn56Rs6amhunTpzNgwAB++tOfUlNT47D4hRAuzGrOIeaAPgC3Hgb60taXOFhy0Kaf2T+iP4+Pvvi8OdOmTePXv/41s2fPBmDp0qV89dVXPPTQQ3Tt2pXi4mLGjh3LDTfccME5ct544w2CgoI4cOAAu3fvZsSIETb9Ho4kncBC2NDpMwD7V89unQCcZfjw4RQWFnL8+HGKiooIDw+nW7du/OY3v+Gbb77By8uL/Px8Tp48Sbdu3c77Gd988w0PPfQQAEOGDGHIkCGO/Ao2J01AQtjI6T4AFzoDUEp5A9uBfK31VKVUCrAEiAQygLu01vVKKX9gETASsADTtNbZ5mc8AdwLNAEPaa2/6kjwrR2p29Ntt93GsmXLOHHiBNOmTWPx4sUUFRWRkZGBr68vycnJZ00T3Zl54hmAJ35n4SAu2gcwBzjQ4vVLwDytdW+gFKNix3wuNcvnmduhlEoFpgMDgWuB182k4pamTZvGkiVLWLZsGbfddhvl5eXExMTg6+vLunXrWp2yefz48bz//vsA7N27l927dzsibLvx1OmgPfRrC3tqctwZQJsSgFIqAbgOeMt8rYCrgOYhLQuBm8zlG83XmOsnmtvfCCzRWtdprY8CWcBoW3wJZxg4cCCVlZXEx8cTFxfHnXfeyfbt2xk8eDCLFi1qdYro+++/n6qqKgYMGMBTTz3FyJEjHRS5EMKlNdUbzz5+dt9VW5uA/go8BoSYryOBMq118y2v8oB4czkeyAXQWjcqpcrN7eOBzS0+s+V7TlNKzQJmASQlJbX5izjDnj1nbkgfFRXFpk2bzrtdVVUVAMnJyadvJB8YGMiSJUvsH6QjSGuIELbTVGc8e/vbfVetngEopaYChVrrDLtHA2it39Rap2mt06Kjox2xS2ED0gkshI00mgnAx/4JoC1nAOOAG5RSU4AAoCvwNyBMKeVjngUkAPnm9vlAIpCnlPIBQjE6g5vLm7V8j3Bj0iEqhA2d7gS2fxNQq2cAWusntNYJWutkjE7cr7XWdwLrgOZ5DWYCzVc+rTRfY67/WhtzBawEpiul/M0RRH2Are0J2pOnHrAlm/47ygmAELZhdc1RQOd6HHhYKZWF0cbffD/ABUCkWf4wMBdAa70PWArsB74EZmutmy51pwEBAVgsFkkCHaS1xmKxEBAQ4OxQ3JL8/ITdNDVfCWz/QZKXdCGY1no9sN5cPsJ5RvForWuB2y7w/heA9t3k0pSQkEBeXh5FRUUd+RiBkUwTEjp+P1tPTsYyDFTYnCteCOYqfH19SUlJcXYY4hzSCSyEjbjohWBCnJd0AgthQw6cDE4SgLAJOQMQwkasjusDkAQghBCupLlPTdm/epYEIDrME5uAPO8bC8dx3K9LEoCwCY+dDE6avoStnT4DsP9vSxKA6DBPHgYqhO01/z1JAhBuQo6EhbAxOQMQQggP48AzakkAQgjhUqQJSLgZaQISwkakE1i4E48cBiod38LuJAEId+GhJwAeOvpV2JX0AQghhGeSJiDhTqQ5RAhbkk5g4WakE1gIG5MzAOEOPLETWAi7kesAhLvxtDMASXnC/uQMQAghPIt0Agt3Ik1AQtiDJADhJjx1OmghbE/6AIQbkWGgQtiQNAEJd+NpncBC2I+cAQjh0uSkR9idnAEI4dqk70PYnFwHINyNVIRC2IpMBSHciHQCC2FD0gkshBCeThKAEEJ4GOkDEG5ErgQWwoakCUi4G8+7DkCSnrAX6QQWbsSTzwA8Le0JB3KFMwClVIBSaqtSapdSap9S6lmzPEUptUUplaWU+lAp5WeW+5uvs8z1yS0+6wmz/Ael1DX2+lLC8WQYqBA24mLXAdQBV2mthwLDgGuVUmOBl4B5WuveQClwr7n9vUCpWT7P3A6lVCowHRgIXAu8rpTytuWXEUII9+dCTUDaUGW+9DUfGrgKWGaWLwRuMpdvNF9jrp+ojMPDG4ElWus6rfVRIAsYbZNvIZxKrgMQwoZcrRNYKeWtlNoJFAKrgcNAmda60dwkD4g3l+OBXABzfTkQ2bL8PO9pua9ZSqntSqntRUVFl/6NhFN4XiewEPbmIglAa92ktR4GJGActfe3V0Ba6ze11mla67To6Gh77UaIDpGTHmE/rtUHcJrWugxYB1wGhCmlfMxVCUC+uZwPJAKY60MBS8vy87xHCLckfd/C5lypCUgpFa2UCjOXA4GrgQMYieBWc7OZwApzeaX5GnP919poJF4JTDdHCaUAfYCttvoiwrmkCUgIW3FcJ7BP65sQByw0R+x4AUu11p8rpfYDS5RSzwPfAwvM7RcA/1RKZQElGCN/0FrvU0otBfYDjcBsrXWTbb+OcAZPvg5ACLtxwBlAqwlAa70bGH6e8iOcZxSP1roWuO0Cn/UC8MKlhylcnpwACGEbDjyekiuBhRDCpbjQdQBCtEauAxDChlypE1iItvC0TmBJecL+JAEIN+DJncCelviEI7jodQBCXIhMBieEjUgTkBBCeCpJAMKNSCewEDakrThqXLUkAGET0hYuhI1YG8GrLdfodpwkANFhntgJLCc9wm6sTZIAhHvxtDOAJquRAby9POt7CweQBCCEa2tOAD6SAIStWRvByzFVsyQAIdqhwWoFwNtbEoCwMS1nAMLNeNp1AHIGIOxGOoGFO/HEYaCNTc0JQP6EhI1ZG0F5O2RX8usVoh1OnwFIE5CwNekEFu7EE4eBNjb3AUgTkLA1axN4yRmAcCOeOgxU+gCEzTXVgbevQ3YlCUCIdmiU6wCEvVSXQFCkQ3YlCUB0mFVbPW4UkHQCC7uptkgCEO6jrqkOf29/Z4fhUNIHIOzmVLEkAOE+6prqCPAOcHYYDiV9AMIu6irhVCGEJTpkd5IARIdoralprCHAx7MSgPQBCLsoPmQ8Rw9wyO4kAYgOqbfWA3hcApDrAIRdHN9pPMcOdMjuJAGIDqltrAXwuD6AsuoGALoGOGa4nvAQedsgKArCkx2yO0kAokPK6soA6OrX1cmROFZhZS3B/j508XfMFZvCQ+Rth8QxDrkdJEgCEB1UVF0EQExQjJMjcazCyjpiQjzrrEfYWU0ZWLKg+3CH7VISgOiQohoPTQAVtURLAhC2lLsV0BA/wmG7lAQgOqSwuhCAqMAoJ0fiWIWVdcR29ayOb2Fn+5aDXwj0GOewXUoCEB1SVF2Ev7e/R/UBaK0prJAmIGFDFQWw5yMYcjv4Ou7AQhKA6JCimiKiA6M9aiqIqrpGahqaiOkqCUDYyM7FYG2AsQ84dLeSAESHFNUUeVz7f05JNQBxoYFOjkR0Ck0NsG0B9JwAUb0dumtJAKJDiqqLiA6KdnYYDrU3vxyAAXEhTo5EdArb34bK4w4/+oc2JAClVKJSap1Sar9Sap9Sao5ZHqGUWq2UyjSfw81ypZSar5TKUkrtVkqNaPFZM83tM5VSM+33tYSjFFYXEh3oWQlg69FSIrr40TMq2NmhCHdXmg1fvwApP4E+Vzt89205A2gEHtFapwJjgdlKqVRgLrBWa90HWGu+BpgM9DEfs4A3wEgYwNPAGGA08HRz0hDu6VTDKaobqz2qCUhrzbbsEkYkheMl8wCJjmioheX3G23/U+c57OKvllpNAFrrAq31DnO5EjgAxAM3AgvNzRYCN5nLNwKLtGEzEKaUigOuAVZrrUu01qXAauBam34b4VBZZVkAJIUkOTkSx8kpqSanpJrxfT1r2KuwMa3hi8cgZyPc8CpE9nJKGJfUB6CUSgaGA1uAWK11gbnqBBBrLscDuS3elmeWXaj83H3MUkptV0ptLyoqupTwhIPtKdoDwIBIx8xc6Ar+fcj4TV7eyzHztYtOavMbsGMhjJsDg291WhhtTgBKqWDgY+DXWuuKluu01hpsc2dwrfWbWus0rXVadLRntS27m/V560kJTaF7cHdnh+IwX+49QZ+YYHpFS/u/aKfNb8BXT0Dfa2HiM04NpU0JQCnli1H5L9Zaf2IWnzSbdjCfC83yfKDl3QwSzLILlQs3dKrhFBknM0hPSHd2KA5TeqqebdklXNU/xqOuexA2ojV891f4cq5R+d++CJx8S9G2jAJSwALggNb6lRarVgLNI3lmAitalN9tjgYaC5SbTUVfAZOUUuFm5+8ks0y4oQ35G2i0NnJlwpXODsVh3t+aQ0OT5qcjftRyKcTF1VfDp/fDmqdh4M1w+z/Bx/kXErZlLttxwF3AHqWUebcCfge8CCxVSt0LHANuN9etAqYAWUA1cA+A1rpEKfUcsM3c7o9a6xKbfAvhcMuzlhMTGMPwGMfNXOhMVqtm6fZcRqdE0L+b50x7IWzg2CZY+aAx0+f430L675x+5N+s1QSgtf4OuND57sTzbK+B2Rf4rLeBty8lQOF68irz2Hh8Iz8f9HN8vDxjPvxVews4ZqnmkUn9nB2KcBcVBbD2Wdj1AYQmwl3LoddVzo7qLJ7x1yts6q09b+GlvJjWb5qzQ3GIxiYrr6w+RO+YYK4bHOfscISrq62ADX+DTf8D2gqXPQjpc8Hf9a4clwQgLkluRS4rDq/glj630K1LN2eH4xALNx3jSNEp/j5jhNwEXlxY/SnIWAjfvQKnimDQrXDV7yGip7MjuyBJAOKSvJLxCj7Kh1lDZjk7FIfYeLiYP395kPR+0Vwz0DMSnrhEJ/cbs3nufB9qSiD5Spj+PiSOdnZkrZIEINps8YHFrMlZw+xhsz1i+odvM4uYtSiDhPBAXr5tqAz9FAZrExTshMzV8MMqKNgFyhv6XmM09/S43CnTOrSHJADRJsszl/PS1pdIT0jnF4N/4exw7G71/pPMXryD5KggFv/XWKKCnT9kTziJtQlO7oXsDXBkPeRugdoyQEFCGlz9RxgyHUJiW/sklyMJQFxUg7WBV3e8yjv73mFM3BheGv8S3l7ezg7LbmobmvjLVz+w4LujDIrvyrv3jJbK35NoDWU5cPx7OL7DeM7fAfVVxvqIXjBgqtHM03OCW1b6LUkCEBd08tRJHv/2cTJOZnBzn5v5/Zjf4+ft5+yw7EJrzVf7TvDiFwfJtlRzx+hEnpo6kEC/zpvsPF5jHRRnGkf3J/bAyX1Gc06NeXmSly/EpsKQaZA01niEda6JDyUBiPNal7OOZzY9Q3VDNc+Pe54be9/o7JDsQmvNpiMWXl2bxaYjFpIjg1j489H8pK/MQ9VpNFf0lkzjuXC/0XFryQLdZGzj7Q/R/aD/FOg+AuKGQbdBLnG1rj1JAhBn+aHkB+ZlzGPD8Q30Cu3FgkkL6B3u2NvUOYLWmo2HLcxfm8mWoyVEBfvz5NRU7r6sB77ernGVprgEWkNVIRQdhOJDUHIESo8Zlb7l8JmKHiCsB8SkwoDrIWaAsRzVB7x9nRe/k0gCEIDR3PPaztdYnrWcLr5deGTkI9w54E58O9kfRZNVs/bASd767ihbj5YQ2cWPJ6emcueYJAJ8pbnH5dWWG5V7yVEoPWpU7pYso+KvLT+znW+Q0VwT2ces6M1KPqIX+MtMrs0kAXi4qvoq3tzzJov3L6ZJNzFjwAx+MeQXRAREODs0m6qqa2TZ9lze3ZhNtqWabl0D+MN1A5gxtodU/K7E2gQVx42O2LJjxrPlsFnZZ0FN6dnbB8calfqgWyCqr/GIGQAhcW4zFNOZJAF4qMr6ShbuW8gHBz+gor6CG3rdwH1D7yMxJLH1N7sJrTU7cspYlpHHZ7uOU1XXyJCEUP42fRhTBsdJU48zWK1wqtCs4HOMir0022iuKc+F8nzjFomnKegaDxEpkHqjcVVteIrxOjzZJadXcCeSADxMRX0Fiw8sZtG+RVQ1VJGemM59Q+5jYNRAZ4dmM1mFVXy17wTLMvI4WnwKPx8vrhscx4yxSYxICpcLuuypuYIvzYbyPPORaxzVl2YbTTeNNWe/p0sMhPeA+JFGJR+ebDTfhCVDaAL4Bjj+e3gISQAeIrcyl3/u/yefZn1KTWMNP0n4CfcNvY9BUYOcHZpNnKyo5Ys9BSzbkcfefOOGdcOTwvjTTwdz/dA4QgI6V1+GU9WWm5X5kTOdraXZRpNNRQE01Z29vX8ohMYbna89J7So4BONMmmTdxpJAJ1cZmkmb+15iy+zv8QLLyanTGZG6gxSI1OdHVqHNFk1u/PKWHPgJGsPFHLwRCUA/WJD+N2U/kwd0p3uYYFOjtJN1ZQZR+1lOWbFnnPmaL4s58w4+WZBkUalHj8SBnSH0CTjdWiCUfEHhDrhS4i2kATQCWmt+S7/OxYfWMyG4xsI8A7g7tS7uXPAnW49g2fpqXr+faiIdT8U8vWBQirrGvFSkNYjgt9e04+rU2PpGyttwq2qrTA7WHON5/I8s8kmF0qyoa787O19u5iVeQJ0H2ZU7s3t8BE9pR3ejUkC6EQq6iv47PBnLDm4hOyKbCIDInlg6ANM7z+d8IBwZ4d3yWrqm/g+p5TNRyx8m1XMrtwyrBoiuvhx9cBYftI3mp/0jSYsqHNendxutRVnjtqbR9KU5RgVfGn2j0fS+AQYTTKhiZAw6sxyWA+jkg8MlxE1nZQkADentWZX0S4+OvQRq4+tpqaxhtTIVJ4b9xxTUqa41dQNJ8pr2ZFTyvc5pWzNLmVvfjlNVo2XgiEJYTx4VR+u6h/DkPhQvDx1Xv6GWqjINx7lecaomYo8o+29ufzcCt7b32xvTzKucA3vYVTuYUnGc5coqeA9lCQAN1VQVcCqo6v4NOtTsiuyCfQJ5OoeVzO933QGRQ1y+ZEutQ1N7MwtY29+OTtyStmVW05+mTE6xM/bi8EJofxyfE/SksMZ2SOC0MBO3olrbYJqC1QWGCNmKvKN58qTUHkcKk8Y686t3MFogw/pbjTRJI42K3bzKD48GbpESwUvzksSgBspqS1hXc46vsz+ks0FmwEYEj2EJ8c+yeSUyYT4uWZbrNWqOWo5xfc5RoW/O6+M3XnlNFo1APFhgQxLCuOeccmkJUcwIC4Ef59OdHFWQ61RiZeZwyGrThjTFlTkGxV7eb5RZm08+33K26i8u8YZFXnSZdC1+5lHaKLx7Cud3aJ9JAG4uIamBtbmrGXl4ZVsPL6RJt1EfHA8vxj8C67vdT0poSnODvEsVqsm23KK3Xnl7M0vZ09+OfuPV1BZZ1Rugb7eDOzelf+6siejksMZHB9KTFc3HufdWHfmiL0836joy1s00VQWGLcHPJdfMIR0M65YTb7CqORD4oyyrvHGIzgGOvHU28L5JAG4qCPlR1iRtYLlmcsprSslKjCKGQNmMLnnZFIjUl2micdSVce+4xXszivj+5wyMnJKKas2ruT08/EiNa4rNwzrzpCEUIYlhtM7Jth97qvb1GhU6BXHjbHuFXnGcnme2URTYDTboM9+n3+o0eYeEmeMmumaYByphyUaR+3BMTJyRrgESQAupNHayLrcdSw5uIStJ7aiUIxPGM9tfW/jivgrXOJGLCfKa9l8xMKWoyVsOWrhSNGp0+t6RndhUmosI5LCGZoYRq/oYPwLsLKFAAATwklEQVR8XHi6haaGMyNjmse8V5hNNc1Xr7acRRIgIMxoa+/aHboPP9P2Hhp/pqL36yJt7sItSAJwAdUN1SzPWs7CfQspOFVAbFAsvxr+K27qfZPT772bY6lmw+Fith0tYcvRktMdtcH+PoxOieDWkQkMSwhjYPdQQoNcsKO2qcGcFjjrzDTBxZnGc2UBZx29e/kYR+2hCcZ9XUMTjCP25uewJPALctpXEcLWJAE4UWltKf/c/08+/OFDKuorGBw1mN+O+i0TEifg4+X4/xqtNccs1WzNLmHLkRK2ZlvILTEq/KhgP4YnhXPPuGTG9oxkQFxX12nK0dqozJunJrBkQdEh47n06NmdqwFhxrTAPX9iVurmSJnwZKPd3QXOsoRwFEkATlBQVcDbe9/mk8xPqLfWk56Yzt2pd5MWm+bQtv0mqyarsIodOaVsOmxh0xELRZXGPC4RXfwYlRzOz8elcGWfKHpFBzu330FrOFV8Zs4ZS9aZR3EW1Fee2dbLFyJ7mXd4us6YIjiyt1HxB3Wuaa6F6AhJAA60q2gXC/Ys4N95/8YLL67vdT0zUmfQN7yvQ/Zf19jErlxjGOb27FI2HC6mstY4Oo4K9mdc70hGp0SQ1iOCPjHBzrnYqqYULEegLNtoqinONCr94sxzpihQRtNMZC8YdodRyUf0NB6hieAtP20hWiN/JXZW21jLv479i0+zPmXbiW2E+4czc+BM7uh3B3HBcXbbr9aavNIaduWVkXGslIxjpew/XnF67H1CeCBXD4hlbM9IRiaH0zOqi2OO8LU2xr6XHG4xk+TRM1MFnzXRmDrTRDP4FuPuThEpZ6YokPHvQnSIJAA7yS7P5uPMj/n40MdUNlQS1yWO34z8Dbf3vZ1gP9tPf1tZ28CevHK+zzUq/N15ZRRX1QPg7+PFsMQw/uvKngxNCGVEj3Bi7Tn2vrrEGCpZcsRormm+2UdZjrHccj545W0cyUekQOoNxiRjkb2NSj+ip3S6CmFHkgBsqKGpgTU5a1iRtYKNxzeilGJC4gTu6H8Ho7qNwkvZZkik1ppsSzUZx0rZnl3C9mOlHC6qQpsDWnpFd2F832iGJ4YxJCGMAXFdbTscU2vj4ibLYfNuTtnGEb0ly2i+OXc2yYBQYwRNRE/oNdGo3CNbNte44OghITyAJAAbqGms4ZPMT3h779sUVhcSHRjNL4b8gmn9ptlsGGdBeQ3fZhaz+YiFzYctHC+vBSAkwIdRyRFMHRLH8KRwhsSHEt7FRhPANdaDJRMKDxhDKC2HjdeWI2d3uoJRkUf2hiG3nZkLPqKnedu+rjIuXggXJAmgA+qb6ln6w1Le2vMWlloLQ6OH8tTYp2xy0VZdYxObj5Sw7mAh32UVk1VYBRijc0YnR/DAhChGJduos1Zr46Kngl1QuA8KD0LRQaPiP30hlNnpGtUHEseYHa69jKab0ES5bZ8QbqjVBKCUehuYChRqrQeZZRHAh0AykA3crrUuVUYv4t+AKUA18DOt9Q7zPTOBP5gf+7zWeqFtv4pjbT+xnWc3PUt2RTajuo3iz0P+zKhuozrUkdrYZGXDYQtf7Cng//YUUFnbSICvF6OSI5iWlsgVfaLo3y2k4521VYWQn2E88rZB/vdnN9uEJkF0X+gzCWJSIaa/0QErlbwQnUpbzgDeBf4HWNSibC6wVmv9olJqrvn6cWAy0Md8jAHeAMaYCeNpIA3j0ssMpdRKrfV55rZ1bdUN1byw5QVWHl5J9y7dmT9hPumJ6e2ulLXW7MgpY/n3eXy+u4Cy6gaC/LyZlBrL9UO7M653FAG+HTib0NrojD22EXI2Q84mo70ejA7Y2FQYeBN0G2w8YlIhoGv79yeEcButJgCt9TdKqeRzim8E0s3lhcB6jARwI7BIa62BzUqpMKVUnLntaq11CYBSajVwLfBBh7+BAx2wHODRfz9KTmUOPxv4M+4fej9Bvu0bpXLoZCXLv89n1Z4Cjlmq8fPx4urUWK4fEkd6v5j2V/pNjXBiN+RuhexvjedThca6wHBIHAsjZ0LCaKPClxtyC+Gx2tsHEKu1LjCXTwCx5nI8kNtiuzyz7ELlP6KUmgXMAkhKSmpneLZl1Vbe2/8e8zLmERYQxptXv8ll3S+75M8prKjli70n+GRHHrvyylEKxvWKYtb4ntwwtDshAe0YDVNVZDTjHP8ecjcbzTnNHbShSdB7IiSkQY9xENUPvFx4cjYhhEN1uBNYa62VUrr1Ldv8eW8CbwKkpaXZ7HPbK6cihyc3PMmOwh1cGX8lf7riT4QFhLX5/aWn6vl893G+3HeCTYctWDX0jgnmd1P6c9Ow+EubC99qNTpnj22A3C3G0X3ZMXOlgm6DYMjtxkRmiWOMTlsZfSOEuID2JoCTSqk4rXWB2cRjtjGQDyS22C7BLMvnTJNRc/n6du7bIbTWLD6wmFcyXsHP249nLnuGm/vc3Ka2/oYmK2v2n+T9rTlsOmyh0apJighi9oTeTB3Snb6xbZxXp7Eeju+A7O+M5pyWnbXBscbt/0bda9zIu9sQac4RQlyS9iaAlcBM4EXzeUWL8geVUkswOoHLzSTxFfAnpVS4ud0k4In2h21fxTXFPPHtE2wu2My47uN45vJn6NalW6vv+z6nlMVbcvjXvhNU1DYS29Wfe69IYeqQ7gyK79p6pa81nNgDmf8yKv3cLdBQbayLHQSDfmq03fe43BhfL0f3QogOaMsw0A8wjt6jlFJ5GKN5XgSWKqXuBY4Bt5ubr8IYApqFMQz0HgCtdYlS6jlgm7ndH5s7hF3N7qLdzFk3h/K6ch4f9Th3Driz1Yp78xELf11ziM1HSgjw9eKagd2YOqQ7E/pF4+PdSpu71QrZ38DB/4MDnxt3oAJjNM7wGZAy3ui4DY620TcUQgiD0trpzewXlJaWprdv3+6w/W07sY3Za2cT7h/O/Kvm0y+i30W3Lz1Vz+8/3cOqPSeICvbnl+N7Mn10Yts6c0/uh4x3Yf+nUHUSfAKgZ7oxfXHfa43bBgohRDsopTK01mmtbSdXApuySrN46OuH6NalGwsmLSA66OJH3BsPF/Po0l0UVdXx8NV9mTW+Z+tDNxvrjQp/8xtG276XL/S9Bgb+FPpNNm4lKIQQDiIJAGNKh8e+fQx/b3/+8R//uGjlX1HbwLzVh1i4MZuE8CCW3Xc5QxNbGRVkOQwZ78COf0JtmTHj5dXPwdA7pGlHCOE0kgCAl7e/TGZpJvMnzL/oHP0bsop59KNdnKioZfqoJH43pf+Fm3u0NjpyN843OnWVF/SfCiNmQq+rZDy+EMLpPD4BfHToIz44+AEzBsxgQtKE825TXt3An1Yd4MPtuSRFBLH0l5cxKvkCtxYsy4UDK+H796BwPwRFwvjfQtrPoWt3O34TIYS4NB6dADYd38Tzm5/n8u6X83Daw+ff5rCFR5bu5ERFLbPG9+Thq/v+uK2/oRb2LYdd78PRb4yybkNgyssw7E65qYkQwiV5bALIr8rnN+t/Q3LXZOalz8PX6+ymnJJT9fzxs318uvM4iRGBfHTf5YzsEX72h5Qcha3/CzsXG237oUnG0f6Qaca0yUII4cI8MgHUNtbyyPpHAHj9P17/0YRu6w4W8tjHuymvbuCB9F48eFVvgvzMfyqr1WjT3/YWHF4LKOg/BUbeAz0nSNu+EMJteGQCePX7V9ln2cdf0/9KfPCZOemarJq/rc1k/tpMescE887PRjEoPtRYqTUc+Ay++Ysx22aXaLjiYWNmzTDXmLROCCEuhcclgJ2FO3nvwHvc0ucWJvaYeLq85FQ9c5Z8z7eZxdw8Ip4XbhpMoJ83WJvg4Ofw3Txjxs3wZLjpDRh0K/jY6NaLQgjhBB6VAGoaa5j77VziusTxSNojp8t35ZZx/3sZFFfV88cbB3LX2B4ogMzVsPopYzRPeDLc8KrRqdvB2z0KIYQr8KgE8M7ed8ivymfBpAWE+IUAsGpPAb/+cCfRwf58dN9lxkVdR7+Btc9B3lYI6wE//QcMvk0qfiFEp+IxCSCvMo8FexZwbfK1jI4bTXV9I0+v2MdHGXkMSwxjwcw0Iou3wbsvGlMvh8QZwzhHzJSmHiFEp+QRCUBrzUtbXwLg0bRHKSivYdaiDPbkl/PL8Sk80rsAv2W3GBV/lxi49kVjVI/cBF0I0Yl5RAJYdXQV6/PW8/DIhzlZ6s+9CzdQXdfIh1O8GHNoDmzdCkFRMOkF4wYrvoHODlkIIeyu0yeA4ppiXtz6IoOjBpPsO5npb24mMaCONf1XEvr1MgjuZjT1DJ8hFb8QwqN0+gQwf8d8qhqquKv3b5m9eCf/EZLHK97z8MksgMt/BeMfg4Cuzg5TCCEcrlMngPK6cj4/8jnXJN3Asx+XcI3fLl6pm4cKioSff2ncU1cIITxUp04Anx3+jAZrA3sOpDK6dgOveM9DxaTCXcvljltCCI/XqSeuWXV0FdF+KXTNOcY8n/mouGFwzxdS+QshBJ04AZw8dZI9xXvwPhHBu/4v4xXZC+5cJu39Qghh6rQJ4OvcrwF4/tQ3eHWNQ929ArpEOjkqIYRwHZ22D+CzH9YR1aBIa6rAe9onENLN2SEJIYRL6ZRnAFprDpfsYFxtJdYpL0P3Yc4OSQghXE6nTABlteVUe9URo8PwTZvp7HCEEMIldcoEkGc5bix07QdKOTcYIYRwUZ0yAZSWnwAgICDKyZEIIYTr6pQJoLK6krCmJkL8ZMinEEJcSKccBTQyPIlvc/I50W+gs0MRQgiX1SnPALoFGe3+3SLDnRyJEEK4rk6ZAPAPgdSbIDTe2ZEIIYTL6pRNQET2gtsXOjsKIYRwaQ4/A1BKXauU+kEplaWUmuvo/QshhDA4NAEopbyB14DJQCpwh1Iq1ZExCCGEMDj6DGA0kKW1PqK1rgeWADc6OAYhhBA4PgHEA7ktXueZZUIIIRzM5UYBKaVmKaW2K6W2FxUVOTscIYTotBydAPKBxBavE8yy07TWb2qt07TWadHR0Q4NTgghPImjE8A2oI9SKkUp5QdMB1Y6OAYhhBA4+DoArXWjUupB4CvAG3hba73PkTEIIYQwKK21s2O4IKVUEXCsAx8RBRTbKBxHcceYQeJ2NHeM2x1jBveMu4fWutU2dJdOAB2llNqutU5zdhyXwh1jBonb0dwxbneMGdw37rZwuVFAQgghHEMSgBBCeKjOngDedHYA7eCOMYPE7WjuGLc7xgzuG3erOnUfgBBCiAvr7GcAQgghLqBTJgBXmHJaKfW2UqpQKbW3RVmEUmq1UirTfA43y5VSar4Z726l1IgW75lpbp+plJrZonykUmqP+Z75Sillg5gTlVLrlFL7lVL7lFJz3CTuAKXUVqXULjPuZ83yFKXUFnNfH5oXH6KU8jdfZ5nrk1t81hNm+Q9KqWtalNvlN6WU8lZKfa+U+tyNYs42/w93KqW2m2Uu/RsxPzdMKbVMKXVQKXVAKXWZO8RtV1rrTvXAuMDsMNAT8AN2AalOiGM8MALY26Lsz8Bcc3ku8JK5PAX4AlDAWGCLWR4BHDGfw83lcHPdVnNbZb53sg1ijgNGmMshwCGMabtdPW4FBJvLvsAWcx9Lgelm+d+B+83lB4C/m8vTgQ/N5VTz9+IPpJi/I297/qaAh4H3gc/N1+4QczYQdU6ZS/9GzM9dCPyXuewHhLlD3PZ8OD0Am38huAz4qsXrJ4AnnBRLMmcngB+AOHM5DvjBXP4HcMe52wF3AP9oUf4PsywOONii/KztbBj/CuBqd4obCAJ2AGMwLt7xOfd3gXEl+mXmso+5nTr3t9K8nb1+UxhzYa0FrgI+N2Nw6ZjNz8rmxwnApX8jQChwFLPf013itvejMzYBufKU07Fa6wJz+QQQay5fKOaLleedp9xmzCaG4RhH0y4ft9mUshMoBFZjHP2Waa0bz7Ov0/GZ68uByHZ8n476K/AYYDVfR7pBzAAa+JdSKkMpNcssc/XfSApQBLxjNrm9pZTq4gZx21VnTABuQRuHCS45BEspFQx8DPxaa13Rcp2rxq21btJaD8M4qh4N9HdySBellJoKFGqtM5wdSztcobUegXFnv9lKqfEtV7rob8QHo0n2Da31cOAURpPPaS4at111xgTQ6pTTTnRSKRUHYD4XmuUXivli5QnnKe8wpZQvRuW/WGv9ibvE3UxrXQasw2gCCVNKNU942HJfp+Mz14cCllbitvVvahxwg1IqG+POeFcBf3PxmAHQWuebz4XAcoyE6+q/kTwgT2u9xXy9DCMhuHrc9uXsNihbPzAy/RGMU77mzq+BToolmbP7AP7C2R1OfzaXr+PsDqetZnkERrtluPk4CkSY687tcJpig3gVsAj46znlrh53NBBmLgcC3wJTgY84u0P1AXN5Nmd3qC41lwdydofqEYzOVLv+poB0znQCu3TMQBcgpMXyRuBaV/+NmJ/7LdDPXH7GjNnl47bnw+kB2OVLGT34hzDagX/vpBg+AAqABoyjj3sx2mzXApnAmhY/HAW8Zsa7B0hr8Tk/B7LMxz0tytOAveZ7/odzOrfaGfMVGKfAu4Gd5mOKG8Q9BPjejHsv8JRZ3tP8o8zCqFj9zfIA83WWub5ni8/6vRnbD7QYxWHP3xRnJwCXjtmMb5f52Nf8ua7+GzE/dxiw3fydfIpRgbt83PZ8yJXAQgjhoTpjH4AQQog2kAQghBAeShKAEEJ4KEkAQgjhoSQBCCGEh5IEIIQQHkoSgBBCeChJAEII4aH+Hy1w1LLsud+sAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%time\n",
    "gen_null(train_x, unlabel_x,valid)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.分离train和valid中的int、float数据类型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_int_float(train_x, unlabel_x, valid):\n",
    "    '''\n",
    "    描述：\n",
    "        1.分离train和valid中的int、float数据类型，这个操作的意义不大。因为并不是说int类型就一定代表类型变量，float变量也可能是离散的。\n",
    "        2.对int类型变量的探索，见 '/2.explore_data/1.explore_int'\n",
    "    参数:\n",
    "        input:\n",
    "            train_x(DataFrame),unlabel_x(DataFrame),valid(DataFrame)\n",
    "        output:\n",
    "            对应的csv\n",
    "    \n",
    "    '''\n",
    "    int_cols = [key for key,val in train_x.dtypes.to_dict().items() if val==np.int64]\n",
    "    float_cols = ['id']+[key for key,val in train_x.dtypes.to_dict().items() if val==np.float64]\n",
    "    vint_cols = [key for key,val in train_x.dtypes.to_dict().items() if val==np.int64 and key!='tag'] # valid中没有'tag'\n",
    "    train_x_int = train_x.loc[:,int_cols]\n",
    "    train_x_float = train_x.loc[:,float_cols]\n",
    "\n",
    "    unlabel_x_int = unlabel_x.loc[:,int_cols]\n",
    "    unlabel_x_float = unlabel_x.loc[:,float_cols]\n",
    "\n",
    "    valid_int = valid.loc[:,vint_cols]\n",
    "    valid_float = valid.loc[:,float_cols]\n",
    "    \n",
    "    train_x_int.to_csv('./preprocess_data/trian_x_int.csv', index = False)\n",
    "    train_x_float.to_csv('./preprocess_data/trian_x_float.csv', index = False)\n",
    "    unlabel_x_int.to_csv('./preprocess_data/unlabel_x_int.csv', index = False)\n",
    "    unlabel_x_float.to_csv('./preprocess_data/unlabel_x_float.csv', index = False)\n",
    "    valid_int.to_csv('./preprocess_data/valid_int.csv', index = False)\n",
    "    valid_float.to_csv('./preprocess_data/valid_float.csv', index = False)\n",
    "    gc.collect()\n",
    "    return train_x_float, unlabel_x_float, valid_float"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 7min 16s, sys: 13.3 s, total: 7min 29s\n",
      "Wall time: 7min 30s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "train_x_float, unlabel_x_float, valid_float = split_int_float(train_x, unlabel_x, valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
